1. Import Data

Data from Kaggle: - https://www.kaggle.com/parulpandey/palmer-archipelago-antarctica-penguin-data
Analysis Helpers:
- Python: https://www.kaggle.com/parulpandey/penguin-dataset-the-new-iris/notebook
- Python: https://www.kaggle.com/amandawest/penguin-dataset-the-new-iris/edit
- R: https://github.com/allisonhorst/palmerpenguins

rm(list=ls())
setwd("~/Desktop/r_notes")                 # best & simplest. 
penguins <- read.csv(header=TRUE, file = "penguins_size.csv")
penguins_no_nas <- na.omit(penguins) # remove missing values from data frame

# relevant libraries
library(dplyr)  # so we can use the pipe operator in part 2
library(plotly) # data visualization in part 3

2. Exploratory Analysis

penguins %>% 
  group_by(species) %>% 
  summarize(across(where(is.numeric), mean, na.rm = TRUE))
## # A tibble: 3 x 5
##   species   culmen_length_mm culmen_depth_mm flipper_length_mm body_mass_g
##   <chr>                <dbl>           <dbl>             <dbl>       <dbl>
## 1 Adelie                38.8            18.3              190.       3701.
## 2 Chinstrap             48.8            18.4              196.       3733.
## 3 Gentoo                47.5            15.0              217.       5076.

And this is the correlation plot, which will provide some insight into how our scatter plots will end up looking:

penguins_cor <- na.omit(penguins[, c(3,5,6)])
res <- cor(penguins_cor)
round(res, 2)
##                   culmen_length_mm flipper_length_mm body_mass_g
## culmen_length_mm              1.00              0.66        0.60
## flipper_length_mm             0.66              1.00        0.87
## body_mass_g                   0.60              0.87        1.00

3. Data Visualization

plot_ly(penguins, 
        x = ~flipper_length_mm,
        y = ~body_mass_g, 
        color = ~flipper_length_mm, 
        size = ~flipper_length_mm) %>%
  layout(xaxis = list(title = "Flipper Length (mm)"),
         yaxis = list (title = "Body Mass (g)"))
plot_ly(penguins, 
        x = ~flipper_length_mm,
        y = ~body_mass_g, 
        color = ~species, 
        size = ~flipper_length_mm) %>%
  layout(xaxis = list(title = "Flipper Length (mm)"),
         yaxis = list (title = "Body Mass (g)"))


Finis!